from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

spark = SparkSession \
    .builder \
    .master('local') \
    .appName('HelloSpark') \
    .getOrCreate()

cities = spark.sparkContext.parallelize([(0, 'Beijing'), (1, 'Shanghai'), (2, 'Guangzhou')]) \
    .toDF(['id', 'name'])

# sql方式
spark.udf.register('surprise', lambda x: x * 2)
cities.createOrReplaceTempView('cities')
spark.sql("SELECT id, surprise(id), name FROM cities").show()

# 编程方式
surprise = udf(lambda x: x * 2)
cities.select(col('id'), surprise(col('id')), col('name')).show()
